Code
df.long <- read.csv("dataLong.csv")Read data.
df.long <- read.csv("dataLong.csv")Let’s load packages/libraries.
# these are mostly for data management/wrangling and visualization
library(tidyverse) # for most things
library(foreign) # for reading SPSS files
library(readxl) # read MS Excel files
library(showtext) # get fonts
library(glue) # simplifies mixing text and code in figures and tables
library(arrow) # support for efficient file formats
library(grateful) # create table+references for packages used in a project
library(styler) # only a one-time installation (it is an Rstudio plugin)
library(car) # for car::recode only
library(skimr) # data skimming
library(lubridate) # for handling dates in data
library(janitor) # for many things in data cleaning
# these are mostly for data analysis and visualization
library(gtsummary)
library(scales)
library(visdat)
library(psych)
library(lme4)
library(nlme)
library(broom.mixed)
library(patchwork)
library(easystats)
library(mice)
library(modelsummary)
library(ggdist)
library(kableExtra)
library(formattable)
library(ggrepel)
library(ggrain)
library(sjPlot)Define a ggplot theme theme_ki(), a standard table function, kbl_ki(), and a color palette based on KI’s design guide, ki_color_palette.
source("ki.R") # this reads an external file and loads whatever is in itdf.long %>%
filter(Group == "Control") %>%
ggplot(aes(x = value, fill = measure)) +
geom_histogram(binwidth = 4) +
facet_grid(measure~time) +
theme_ki() +
scale_fill_manual(values = ki_color_palette) +
labs(title = "Outcomes over time",
subtitle = "Control group",
x = "",
y = "Number of respondents")df.long %>%
filter(!Group == "Control") %>%
ggplot(aes(x = value, fill = measure)) +
geom_histogram(binwidth = 4) +
facet_grid(measure~time) +
theme_ki() +
scale_fill_manual(values = ki_color_palette) +
labs(title = "Outcomes over time",
subtitle = "Intervention group",
x = "",
y = "Number of respondents")df.long %>%
filter(Group == "Control") %>%
ggplot(aes(x = value, fill = measure)) +
geom_density() +
facet_grid(measure~time) +
theme_ki() +
scale_fill_manual(values = ki_color_palette) +
labs(title = "Outcomes over time",
subtitle = "Control group",
x = "",
y = "Density of respondents")df.long %>%
filter(!Group == "Control") %>%
ggplot(aes(x = value, fill = measure)) +
geom_density() +
facet_grid(measure~time) +
theme_ki() +
scale_fill_manual(values = ki_color_palette) +
labs(title = "Outcomes over time",
subtitle = "Intervention group",
x = "",
y = "Density of respondents")What do these distributions look like to you? Normal?
We’ll get back to what this means for our analysis.
df.long %>%
ggplot(aes(x = time, y = value, fill = Group)) +
geom_violin(position = position_dodge(0.9),
alpha = 0.9) +
geom_boxplot(position = position_dodge(0.9),
width = .2,
notch = TRUE,
outlier.shape = NA,
alpha = 0.3) +
facet_wrap(~measure,
ncol = 1) +
theme_ki() +
labs(title = "Outcomes over time",
x = "Time point",
y = "Distribution of outcome measurements")Using the package ggrain we can get this sweet figure that combines a boxplot, a split violin plot, jittered points for individuals, and lines between individuals over time!
df.long %>%
filter(measure == "DEP") %>%
ggplot(aes(x = time, y = value, group = time, fill = Group, color = Group)) +
geom_rain(
#alpha = .6,
boxplot.args = list(color = "black", outlier.shape = NA),
id.long.var = "id"
) +
scale_fill_brewer(palette = "Dark2",
aesthetics = c("color","fill")) +
theme_ki() +
facet_wrap(~Group,
nrow = 2) +
labs(title = "Depression over time")df.long %>%
filter(measure == "DEP") %>%
ggplot(aes(x = time, y = value, group = time, fill = Group, color = Group)) +
geom_rain(
#alpha = .6,
boxplot.args = list(color = "black", outlier.shape = NA),
id.long.var = "id"
) +
scale_fill_brewer(palette = "Dark2",
aesthetics = c("color","fill")) +
theme_ki() +
facet_wrap(~Group,
nrow = 2) +
labs(title = "Depression over time")We will start with DEP as outcome and fit a simple linear model. But first, we’ll split our measure variable into three separate variables (while retaining time as its own variable).
df.model <- df.long %>%
pivot_wider(names_from = "measure",
values_from = "value") %>%
rename(Depression = DEP,
Anxiety = ANX,
Stress = STRESS) %>%
mutate(time = as.integer(time))m1 <- lmer(data = df.model,
Depression ~ time + Group + time*Group + (1 | id),
REML = TRUE)sjPlot::tab_model(m1)| Depression | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 17.01 | 14.40 – 19.62 | <0.001 |
| time | -0.93 | -1.69 – -0.16 | 0.017 |
| Group [MiCBT] | -1.74 | -5.52 – 2.04 | 0.367 |
| time × Group [MiCBT] | -0.92 | -2.05 – 0.20 | 0.108 |
| Random Effects | |||
| σ2 | 36.10 | ||
| τ00id | 70.74 | ||
| ICC | 0.66 | ||
| N id | 106 | ||
| Observations | 368 | ||
| Marginal R2 / Conditional R2 | 0.044 / 0.677 | ||
tidy(m1)# A tibble: 6 × 6
effect group term estimate std.error statistic
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 fixed <NA> (Intercept) 17.0 1.33 12.8
2 fixed <NA> time -0.927 0.388 -2.39
3 fixed <NA> GroupMiCBT -1.74 1.92 -0.903
4 fixed <NA> time:GroupMiCBT -0.921 0.572 -1.61
5 ran_pars id sd__(Intercept) 8.41 NA NA
6 ran_pars Residual sd__Observation 6.01 NA NA
glance(m1)# A tibble: 1 × 7
nobs sigma logLik AIC BIC REMLcrit df.residual
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 368 6.01 -1284. 2581. 2604. 2569. 362
plot(parameters(m1))check_model(m1)What happens if we define time as a factor?
link to marginaleffects!